{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "relation_data_dir='E:/文档/数据集/Data Sets used in Computing Education/junyi/'\n",
    "dict_dir='../../../../data/junyi/dict_knowledge_code.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(dict_dir,'r') as f:\n",
    "    filtered_problem_id_old_new=json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "prob_data=pd.read_csv(relation_data_dir+'junyi_Exercise_table.csv')\n",
    "\n",
    "prerequisites=prob_data.loc[:,['name','prerequisites']]\n",
    "prerequisites['name']=prerequisites['name'].map(filtered_problem_id_old_new)\n",
    "prerequisites['prerequisites']=prerequisites['prerequisites'].map(filtered_problem_id_old_new)\n",
    "\n",
    "prerequisites=prerequisites.dropna(how='any').astype('int')\n",
    "prerequisites.columns=['Exercise_A','Exercise_B']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "relation_test=pd.read_csv(relation_data_dir+'relationship_annotation_testing.csv')\n",
    "relation_train=pd.read_csv(relation_data_dir+'relationship_annotation_training.csv')\n",
    "\n",
    "relation_data=pd.concat([relation_test,relation_train]).loc[:,['Exercise_A','Exercise_B','Similarity_avg']]\n",
    "relation_data=relation_data[relation_data['Similarity_avg']>=5].loc[:,['Exercise_A','Exercise_B']]\n",
    "relation_data['Exercise_A']=relation_data['Exercise_A'].map(filtered_problem_id_old_new)\n",
    "relation_data['Exercise_B']=relation_data['Exercise_B'].map(filtered_problem_id_old_new)\n",
    "relation_data=relation_data.dropna(how='any').astype('int')\n",
    "\n",
    "# 相似关系是相互的\n",
    "relation_data_2=pd.DataFrame(columns=['Exercise_A','Exercise_B']) \n",
    "relation_data_2['Exercise_A']=relation_data['Exercise_B']\n",
    "relation_data_2['Exercise_B']=relation_data['Exercise_A']\n",
    "\n",
    "relationship=pd.concat([prerequisites,relation_data,relation_data_2]).drop_duplicates(keep='first')\n",
    "relationship.columns=['concept_1','concept_2']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "relationship.to_csv('concept_relationship.csv',index=False)"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "527a93331b4b1a8345148922acc34427fb7591433d63b66d32040b6fbbc6d593"
  },
  "kernelspec": {
   "display_name": "Python 3.8.12 ('pytorch')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
